{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### pytorch基础"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 层和块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.1836,  0.1682,  0.1281,  0.1506,  0.0280,  0.0562,  0.0096, -0.0237,\n",
       "          0.1606,  0.1557],\n",
       "        [-0.2137,  0.2054,  0.0419,  0.0795,  0.0261,  0.0492,  0.0110,  0.0790,\n",
       "          0.2270,  0.2097]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "from torch.nn import functional as F\n",
    "\n",
    "net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))\n",
    "\n",
    "x = torch.rand(2,20)\n",
    "net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.hidden = nn.Linear(20,256)\n",
    "        self.out = nn.Linear(256,10)\n",
    "    \n",
    "    def forward(self,x):\n",
    "        return self.out(F.relu(self.hidden(x)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.0640, -0.1805,  0.1068,  0.1255, -0.0154,  0.1194, -0.2126, -0.1313,\n",
       "          0.0374, -0.0931],\n",
       "        [ 0.0588, -0.0229,  0.0806,  0.3434,  0.0077,  0.0224, -0.0613,  0.0071,\n",
       "          0.0300, -0.1745]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = MLP()\n",
    "net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.0490, -0.0295,  0.0419, -0.1512, -0.0194, -0.0046, -0.1043, -0.0795,\n",
       "         -0.1253, -0.1562],\n",
       "        [-0.1410, -0.0109,  0.1561, -0.0872, -0.1428, -0.0764,  0.0208, -0.1255,\n",
       "          0.0794,  0.0326]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class MySequential(nn.Module):\n",
    "    def __init__(self,*args):\n",
    "        super().__init__()\n",
    "        for block in args:\n",
    "            self._modules[block] = block\n",
    "    def forward(self,x):\n",
    "        for block in self._modules.values():\n",
    "            x = block(x)\n",
    "        return x\n",
    "    \n",
    "net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))\n",
    "net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.0112, grad_fn=<SumBackward0>)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class FixedHiddenMLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.rand_weight = torch.rand((20,20),requires_grad=False)\n",
    "        self.linear = nn.Linear(20,20)\n",
    "        \n",
    "    def forward(self,x):\n",
    "        x = self.linear(x)\n",
    "        x = F.relu(torch.mm(x,self.rand_weight)+1)\n",
    "        x = self.linear(x)\n",
    "        while x.abs().sum() > 1:\n",
    "            x /= 2\n",
    "        return x.sum()\n",
    "\n",
    "net = FixedHiddenMLP()\n",
    "net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(-0.2360, grad_fn=<SumBackward0>)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#混合搭配各种组合块\n",
    "class NestMLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),\n",
    "                                 nn.Linear(64,32),nn.ReLU())\n",
    "        self.linear = nn.Linear(32,16)\n",
    "    \n",
    "    def forward(self,x):\n",
    "        return self.linear(self.net(x))\n",
    "\n",
    "chimera = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())\n",
    "chimera(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 参数管理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.2476],\n",
       "        [-0.2192]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "\n",
    "net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))\n",
    "X = torch.rand(size=(2,4))\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([('weight', tensor([[-0.3331, -0.0062, -0.0312,  0.3054,  0.0725,  0.0484, -0.1686,  0.3320]])), ('bias', tensor([-0.2798]))])\n"
     ]
    }
   ],
   "source": [
    "print(net[2].state_dict())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'torch.nn.parameter.Parameter'>\n",
      "Parameter containing:\n",
      "tensor([-0.2798], requires_grad=True)\n",
      "tensor([-0.2798])\n"
     ]
    }
   ],
   "source": [
    "print(type(net[2].bias))\n",
    "print(net[2].bias)\n",
    "print(net[2].bias.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net[2].weight.grad == None     #还没做反向计算，grad为None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))\n",
      "('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))\n"
     ]
    }
   ],
   "source": [
    "#一次性访问所有参数\n",
    "print(*[(name,param.shape) for name,param in net[0].named_parameters()])\n",
    "print(*[(name,param.shape) for name,param in net.named_parameters()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-0.2798])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net.state_dict()['2.bias'].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.4469],\n",
       "        [-0.4469]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#从嵌套块收集参数\n",
    "def block1():\n",
    "    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),\n",
    "                         nn.ReLU())\n",
    "\n",
    "def block2():\n",
    "    net = nn.Sequential()\n",
    "    for i in range(4):\n",
    "        net.add_module(f'block {i}',block1())\n",
    "    return net\n",
    "\n",
    "rgnet = nn.Sequential(block2(),nn.Linear(4,1))\n",
    "rgnet(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sequential(\n",
      "  (0): Sequential(\n",
      "    (block 0): Sequential(\n",
      "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
      "      (1): ReLU()\n",
      "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
      "      (3): ReLU()\n",
      "    )\n",
      "    (block 1): Sequential(\n",
      "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
      "      (1): ReLU()\n",
      "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
      "      (3): ReLU()\n",
      "    )\n",
      "    (block 2): Sequential(\n",
      "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
      "      (1): ReLU()\n",
      "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
      "      (3): ReLU()\n",
      "    )\n",
      "    (block 3): Sequential(\n",
      "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
      "      (1): ReLU()\n",
      "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
      "      (3): ReLU()\n",
      "    )\n",
      "  )\n",
      "  (1): Linear(in_features=4, out_features=1, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "print(rgnet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([ 0.0001, -0.0048,  0.0079, -0.0074]), tensor(0.))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#内置初始化\n",
    "def init_normal(m):\n",
    "    if type(m) ==  nn.Linear:\n",
    "        nn.init.normal_(m.weight,mean=0,std=0.01)\n",
    "        nn.init.zeros_(m.bias)\n",
    "\n",
    "net.apply(init_normal)           #对net的所有module调用for循环初始化参数\n",
    "net[0].weight.data[0],net[0].bias.data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([1., 1., 1., 1.]), tensor(0.))"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def init_constant(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.constant_(m.weight,1)\n",
    "        nn.init.zeros_(m.bias)\n",
    "\n",
    "net.apply(init_constant)\n",
    "net[0].weight.data[0],net[0].bias.data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([ 0.4996,  0.2442,  0.6014, -0.2331])\n",
      "tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])\n"
     ]
    }
   ],
   "source": [
    "def xavier(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.xavier_uniform_(m.weight)\n",
    "def init_42(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.constant_(m.weight,42)\n",
    "\n",
    "net[0].apply(xavier)\n",
    "net[2].apply(init_42)\n",
    "print(net[0].weight.data[0])\n",
    "print(net[2].weight.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Init weight torch.Size([8, 4])\n",
      "Init weight torch.Size([1, 8])\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.0000, -8.6980, -8.3204, -7.9655],\n",
       "        [-9.6852,  5.7624, -9.2351, -7.1665]], grad_fn=<SliceBackward>)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def my_init(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        print(\n",
    "            'Init',\n",
    "            *[(name,param.shape) for name,param in m.named_parameters()][0])\n",
    "        nn.init.uniform_(m.weight,-10,10)\n",
    "        m.weight.data *= m.weight.data.abs() >= 5        #保留数值大于五的参数\n",
    "net.apply(my_init)\n",
    "net[0].weight[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([True, True, True, True, True, True, True, True])\n",
      "tensor([True, True, True, True, True, True, True, True])\n"
     ]
    }
   ],
   "source": [
    "#参数绑定\n",
    "shared = nn.Linear(8,8)\n",
    "net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,\n",
    "                    nn.ReLU(),nn.Linear(8,1))\n",
    "net(X)\n",
    "print(net[2].weight.data[0] == net[4].weight.data[0])\n",
    "net[2].weight.data[0,0] = 100\n",
    "print(net[2].weight.data[0] == net[4].weight.data[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 自定义层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-2., -1.,  0.,  1.,  2.])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn.functional as F\n",
    "from torch import nn\n",
    "\n",
    "class CentereLayer(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self,x):\n",
    "        return x - x.mean()\n",
    "layer = CentereLayer()\n",
    "layer(torch.FloatTensor([1,2,3,4,5]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(2.5611e-09, grad_fn=<MeanBackward0>)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = nn.Sequential(nn.Linear(8,128),CentereLayer())\n",
    "Y = net(torch.rand(4,8))\n",
    "Y.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([[0.1550, 0.4470, 0.5650],\n",
       "        [0.1336, 0.9331, 0.0639],\n",
       "        [0.7202, 0.4470, 0.8905],\n",
       "        [0.9378, 0.9549, 0.9151],\n",
       "        [0.0584, 0.2427, 0.9126]], requires_grad=True)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class MyLinear(nn.Module):\n",
    "    def __init__(self,in_units,units):\n",
    "        super().__init__()\n",
    "        self.weight = nn.Parameter(torch.rand(in_units,units))\n",
    "        self.bias = nn.Parameter(torch.rand(units,))\n",
    "    \n",
    "    def forward(self,X):\n",
    "        linear = torch.matmul(X,self.weight.data) + self.bias.data\n",
    "        return F.relu(linear)\n",
    "\n",
    "dense = MyLinear(5,3)\n",
    "dense.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.5848, 1.1631, 0.8622],\n",
       "        [1.1261, 1.8937, 2.0965]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dense(torch.rand(2,5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[78.5394],\n",
       "        [87.5807]])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = nn.Sequential(MyLinear(64,8),MyLinear(8,1))\n",
    "net(torch.rand(2,64))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 读写文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0, 1, 2, 3])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "from torch.nn import functional as F\n",
    "\n",
    "x = torch.arange(4)\n",
    "torch.save(x,'x_file')\n",
    "\n",
    "x2 = torch.load('x_file')\n",
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.zeros(4)\n",
    "torch.save([x,y],'x_files')\n",
    "x2,y2 = torch.load('x_files')\n",
    "(x2,y2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mydict = {'x':x,'y':y}\n",
    "torch.save(mydict,'mydict')\n",
    "mydict2 = torch.load('mydict')\n",
    "mydict2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 加载和保存模型参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.hidden = nn.Linear(20,256)\n",
    "        self.output = nn.Linear(256,10)\n",
    "    \n",
    "    def forward(self,x):\n",
    "        return self.output(F.relu(self.hidden(x)))\n",
    "net = MLP()\n",
    "x = torch.rand(size=(2,20))\n",
    "Y = net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(net.state_dict(),'mlp.params')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MLP(\n",
       "  (hidden): Linear(in_features=20, out_features=256, bias=True)\n",
       "  (output): Linear(in_features=256, out_features=10, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clone = MLP()             #先生成网络，再加载参数\n",
    "clone.load_state_dict(torch.load('mlp.params'))\n",
    "clone.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[True, True, True, True, True, True, True, True, True, True],\n",
       "        [True, True, True, True, True, True, True, True, True, True]])"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y_clone = clone(x)\n",
    "Y_clone == Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "faker",
   "language": "python",
   "name": "faker"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
